{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>joy</td>\n",
       "      <td>On days when I feel close to my partner and ot...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>fear</td>\n",
       "      <td>Every time I imagine that someone I love or I ...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>anger</td>\n",
       "      <td>When I had been obviously unjustly treated and...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sadness</td>\n",
       "      <td>When I think about the short time that we live...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>disgust</td>\n",
       "      <td>At a gathering I found myself involuntarily si...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         0                                                  1    2\n",
       "0      joy  On days when I feel close to my partner and ot...  NaN\n",
       "1     fear  Every time I imagine that someone I love or I ...  NaN\n",
       "2    anger  When I had been obviously unjustly treated and...  NaN\n",
       "3  sadness  When I think about the short time that we live...  NaN\n",
       "4  disgust  At a gathering I found myself involuntarily si...  NaN"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Reading the Dataset (ISEAR Dataset)\n",
    "data = pd.read_csv('ISEAR.csv',header=None)\n",
    "\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "labels = data[0].values.tolist()\n",
    "sents = data[1].values.tolist()\n",
    "X_train, X_test, y_train, y_test = train_test_split(sents, labels, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "vectorizer = TfidfVectorizer()\n",
    "X_train = vectorizer.fit_transform(X_train)\n",
    "X_test = vectorizer.transform(X_test)\n",
    "\n",
    "# 其他特征可以提取吗？\n",
    "# 1. 词性的特征\n",
    "# 2. n-gram "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/wenzheli/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py:652: Warning: The least populated class in y has only 1 members, which is too few. The minimum number of members in any class cannot be less than n_splits=5.\n",
      "  % (min_groups, self.n_splits)), Warning)\n",
      "/Users/wenzheli/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "/Users/wenzheli/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
      "  \"this warning.\", FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'C': 5}\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "parameters = {'C':[0.00001, 0.0001, 0.001, 0.005,0.01,0.05, 0.1, 0.5,1,2,5,10]}\n",
    "lr = LogisticRegression()\n",
    "lr.fit(X_train, y_train).score(X_test, y_test)\n",
    "\n",
    "clf = GridSearchCV(lr, parameters, cv=10)\n",
    "clf.fit(X_train, y_train)\n",
    "clf.score(X_test, y_test)\n",
    "print (clf.best_params_)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[101,  35,  17,  26,  11,  18,  19],\n",
       "       [ 23, 122,  14,  18,   8,   5,  14],\n",
       "       [ 12,   7, 141,  12,   8,  11,   9],\n",
       "       [ 25,  10,  11, 110,  12,  17,  24],\n",
       "       [  5,   9,   8,   9, 180,  13,   9],\n",
       "       [ 20,  16,  12,   6,  18, 126,   7],\n",
       "       [ 23,  23,  17,  33,  18,   8, 104]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "confusion_matrix(y_test, clf.predict(X_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
